# topic 18 # This will be remarkably similar to topics 15-17, so # much so that it would be worth comparing the four scripts. # First, set up the situation. We have a population # with an unknown standard deviation, and for that matter, # an unknown mean, but we are not worried about that. # We do need to know that the population is approximately # normally distributed. # source("../gnrnd5.R") source("../gnrnd4.R") gnrnd5( 138579399904, 432001673) # population size is 4000 items big_pop <- L1 # We do not know the standard deviation of big_pop # Now, someone says that they believe that the # standard deviation in big_pop is 50. # Thus our null hypothesis is that the standard # deviation in big_pop is 50. We want to look at an # alternative that says the standard deviation is # less than 50. This is a one-tailed test. # We will get a sample of size 83 and we will # look at the standard deviation of the sample. # We are willing to be wrong in telling the # person that they are wrong 5% of the time! # That is, even if the true standard deviation in # big_pop is 50, we are willing to reject the null # hypothesis for 5% of the random samples that we take. # ######################## ## The critical value approach. We know that ## we can use the fact that the ratio of (n-1) times ## the variance of the sample to the ## variance of the population will be a ## chi-squared distribution with n-1 degrees of freedom. ## ## So, if we assume the null hypothesis is true then, ## for a sample of size 83 we want to find the chi-squared ## value that has 5% of the area to its left. chi_value <- qchisq(0.05, 82) chi_value # this is our critical low value ## # Get our sample # the first time we do this let us get the # same sample each time gnrnd4(922138201, 400000001) L1 # take those as the index values of our random sample our_samp <- big_pop[ L1 ] our_samp # find the standard deviation of our sample samp_sd <- sd( our_samp ) samp_sd ## so now compare (n-1)*samp_sd^2/(50^2) ## to our critical low value. (82)*samp_sd^2/(50^2) ## In this case the ratio is not less than ## than our critical low. Therefore, at the ## 0.05 level of significance, we do not have evidence ## to reject null hypothesis that the true ## population standard deviation is 50 ## in favor of the alternative hypothesis that ## the true standard deviation is less than 50. ############# ############# the attained significance approach ## how strange would it be to get a ratio of ## 82*samp_sd^2/(50^2) if the true ## standard deviation is 50? # pchisq(82*samp_sd^2/(50^2), 82 ) # # That probability is not less than our 5% level # of significance. Therefore, do not reject the # null hypothesis in favor of the alternative. ####################### Now use the function to ####################### do the same thing source("../hypo_sigma.R") hypoth_test_sigma( 50, 83, samp_sd, -34, 0.05) ####################################### ####################################### # Now we want to repeat this process # but each time we want a different sample # of size 80 L1 <- sample( big_pop, 83 ) samp_sd <- sd( L1 ) hypoth_test_sigma( 50, 83, samp_sd, -34, 0.05) #### perform lines 95-97 again and again ### now, since we have the population let us peek # at the true standard deviation source("../pop_sd.R") true_sd <- pop_sd( big_pop ) true_sd ####### Try our samples again, but this time test ## the null hypothesis that the true standard ## deviation is 46.50194, and do the test at ## the 0.05 level of significance. L1 <- sample( big_pop, 83 ) samp_sd <- sd( L1 ) hypoth_test_sigma( true_sd, 83, samp_sd, -34, 0.05) #### perform lines 109-111 again and again, #### and we should see a Type I error about #### 5% of the time. ### we can actually do this 1000 times and see how ### times we reject the null hypothesis even ### though it is true. L2 <- 1:1000 for( i in 1:1000) { L1 <- sample( big_pop, 83 ) samp_sd <- sd( L1 ) answer <- hypoth_test_sigma( true_sd, 83, samp_sd, -34, 0.05) L2[i] <- answer[10] } table( L2 )